* This file generates the simulated data for selection tests (age) and produces Figures E1, E2 and E3.

* create base data for simulations
use "$saveddata/data_complete_withcosts.dta", clear

*sample 10 
keep depdur d_age bin
drop if bin>600

* frequencies
bys bin: egen freq = count(depdur)

* bin polynomial variables
forv x = 1/10 {
	gen bin`x' = bin^`x'
	}

* counterfactuals
reg freq bin1-bin10 if bin<180 | bin>400
predict freq_cf, xb

reg d_age bin1-bin10 if bin<180 | bin>260
predict d_age_cf, xb

bys bin: egen sdd_age = sd(d_age)
reg sdd_age bin1-bin10 if bin<180 | bin>260
predict sdd_age_cf, xb

* movers per bin
gen movers = freq_cf - freq
su bin if movers<0 & bin>240
replace movers = 0 if bin>=`r(min)'

* save collapse data 
preserve
collapse (mean) freq freq_cf d_age d_age_cf, by(bin)
compress
save sim_obs.dta, replace
restore

* save temp file
keep d_age* sdd_age_cf freq* movers bin depdur
compress
tempfile temp
save `temp', replace



* create simulation dataset
forv x = 170(10)500 {
	tempfile simsample`x'
	}

* pre and post exclusion window: as per observed data
u `temp', clear
keep if bin<180 | bin>500
save `simsample170', replace

* pre-threshold exclusion window : remove post-threshold movers
u `temp', clear
keep if bin>=180 & bin<=240

qui levelsof bin
foreach i in `r(levels)' {
	preserve
	keep if bin==`i'

	qui su freq_cf 
	local samplen = round(`r(mean)',1)
	sample `samplen', count
	
	save `simsample`i'', replace
	restore
	}
	
* post-threshold exclusion window: replace post-threshold movers
u `temp', clear
keep if bin>240 & bin<=500

qui levelsof bin
foreach i in `r(levels)' {
	preserve
	keep if bin==`i'

	* target sample size
	su freq_cf
	local expandn = r(mean)
	
	* current sample size
	count 
	local currentn = r(N)
	
	* expand multiple
	local n = round(`expandn' / `currentn',1) + 1
	expand `n'
	
	* sample down exactly
	local samplen = round(`expandn',1)
	sample `samplen', count

	save `simsample`i'', replace
	restore
	}
	
* assemble dataset
u `simsample170', clear
forv x = 180(10)500 {
	append using `simsample`x''
	}
	
* fix age distribution: replace with normal distribution based on counterfactual
gen d_age_fix = rnormal(d_age_cf,sdd_age_cf)

* save
keep d_age_fix bin movers depdur
compress
save simsample.dta, replace



* simulations
u simsample, clear

* scenario
local scenario = 4

* error
su d_age_fix
gen e = rnormal(0,r(sd))

* selection equation
local b1 = 1 // 0 0.1 0.5 1
gen selection = 25 + `b1'*d_age_fix + e

* find threshold s.t. enough movers per bin
qui levelsof bin if bin>240 & bin<=400
foreach i in `r(levels)' {
	gen tau`i' = 40
	}

gen d = 0

qui levelsof bin if bin>240 & bin<=400
foreach i in `r(levels)' {
	qui su movers if bin==`i'
	local movers = r(mean)
	
	while `movers'>0 {
		qui replace tau`i' = tau`i'-1
		qui replace d = 1 if selection>tau`i' & bin==`i'
		
		qui count if d==1 & bin==`i'
		local moved = r(N)
		
		qui su movers if bin==`i'
		local movers = r(mean) - `moved'
		
		di "Movers remaining: " `movers'
		qui su tau`i'
		di "Tau" `i' " = " `r(mean)'
		di ""
		}
	}
	
* assign end locations
gen depdur_post = .

qui levelsof bin if bin>=180 & bin<=240
foreach i in `r(levels)' {
	qui su movers if bin==`i'
	local movers = -`r(mean)'

	* list of post-target movers
	gsort -d bin -selection 
	
	* move top of list
	replace depdur_post = `i' if _n<=`movers'
	
	* remove those moved from the list
	replace d = 0 if _n<=`movers'
	}
	
replace depdur_post = depdur if depdur_post==.

* new bins
gen bin_post = 10 if depdur_post<=10
forv x = 20(10)800 {
	local y = `x' - 10
	replace bin_post = `x' if depdur_post>`y' & depdur_post<=`x'
	}
replace bin_post = 810 if depdur_post>800 & depdur_post!=.

* save
collapse (mean) d_age_fix, by(bin_post)
rename bin_post bin
rename d_age_fix age_s`scenario'
save sim_scenario`scenario'.dta, replace
	
	
* results
u sim_obs, clear
rename d_age age
rename d_age_cf age_cf
forv x = 1/4 {
	merge 1:1 bin using sim_scenario`x'
	drop if _merge==2
	drop _merge
	}
 
* Make Figure E1
line age_cf bin, lpattern(dash) lcolor(black)  ///
	xtick(0(30)600) xmtick(0(10)600) graphregion(color(white)) xline(240, lcolor(red) lpattern(shortdash)) ///
	xline(180, lcolor(gray) lpattern(dash)) xline(380, lcolor(gray) lpattern(dash)) ///
	legend(lab(1 "Counterfactual") cols(1) ring(0) pos(5)) ylab(10(10)60) xlab(0(60)600) ///
	ytitle("Age") xtit("Wait time (mins)") name(simselection1, replace) 
graph export "$results/FigE1a.pdf", as(pdf) replace


line age_cf age_s1 bin, lpattern(dash solid) lcolor(black red) lwidth(thin thick) ///
	xtick(0(30)600) xmtick(0(10)600) graphregion(color(white)) xline(240, lcolor(red) lpattern(shortdash)) ///
	xline(180, lcolor(gray) lpattern(dash)) xline(380, lcolor(gray) lpattern(dash)) ///
	legend(lab(1 "Counterfactual") lab(2 "Random selection") cols(1) ring(0) pos(5)) ylab(10(10)60) xlab(0(60)600) ///
	ytitle("Age") xtit("Wait time (mins)") name(simselection2, replace) 
graph export "$results/FigE1b.pdf", as(pdf) replace

*Make Figure E2
scatter age bin, msymbol(o) connect(line) lcolor(black) mcolor(black) || ///
	line age_s1 bin, lpattern(solid) lcolor(red) lwidth(thick) ///
	xtick(0(30)600) xmtick(0(10)600) graphregion(color(white)) xline(240, lcolor(red) lpattern(shortdash)) ///
	xline(180, lcolor(gray) lpattern(dash)) xline(380, lcolor(gray) lpattern(dash)) ylab(10(10)60) xlab(0(60)600) ///
	legend(lab(1 "Observed") lab(2 "Random selection") cols(1) ring(0) pos(5)) ///
	ytitle("Age") xtit("Wait time (mins)") name(simselection2alt, replace) 
graph export "$results/FigE2.pdf", as(pdf) replace

* Make Figure E3
*****************

line age_cf age_s1 bin, lpattern(dash solid) lcolor(black red) lwidth(thin thick) ///
	xtick(0(30)600) xmtick(0(10)600) graphregion(color(white)) xline(240, lcolor(red) lpattern(shortdash)) ///
	xline(180, lcolor(gray) lpattern(dash)) xline(380, lcolor(gray) lpattern(dash)) ///
	legend(lab(1 "Counterfactual") lab(2 "Random selection") cols(1) ring(0) pos(5)) ylab(10(10)60) xlab(0(60)600) ///
	ytitle("Age") xtit("Wait time (mins)") name(simselection2, replace) 
graph export "$results/FigE3a.pdf", as(pdf) replace

line age_s1 age_s4 bin, lpattern(dash solid solid) lcolor(red navy) lwidth(thick med) ///
	xtick(0(30)600) xmtick(0(10)600) graphregion(color(white)) xline(240, lcolor(red) lpattern(shortdash)) ///
	xline(180, lcolor(gray) lpattern(dash)) xline(380, lcolor(gray) lpattern(dash)) ylab(10(10)60) xlab(0(60)600) ///
	legend(lab(1 "Random selection") lab(2 "Selection, {&beta} = 1") cols(1) ring(0) pos(5)) ///
	ytitle("Age") xtit("Wait time (mins)") name(simselection3, replace) 
graph export "$results/FigE3b.pdf", as(pdf) replace

line age_s1 age_s3 bin, lpattern(dash solid solid) lcolor(red navy) lwidth(thick med) ///
	xtick(0(30)600) xmtick(0(10)600) graphregion(color(white)) xline(240, lcolor(red) lpattern(shortdash)) ///
	xline(180, lcolor(gray) lpattern(dash)) xline(380, lcolor(gray) lpattern(dash)) ylab(10(10)60) xlab(0(60)600) ///
	legend(lab(1 "Random selection") lab(2 "Selection, {&beta} = 0.5") cols(1) ring(0) pos(5)) ///
	ytitle("Age") xtit("Wait time (mins)") name(simselection4, replace)  
graph export "$results/FigE3c.pdf", as(pdf) replace

line age_s1 age_s2 bin, lpattern(dash solid solid) lcolor(red navy) lwidth(thick med) ///
	xtick(0(30)600) xmtick(0(10)600) graphregion(color(white)) xline(240, lcolor(red) lpattern(shortdash)) ///
	xline(180, lcolor(gray) lpattern(dash)) xline(380, lcolor(gray) lpattern(dash)) ylab(10(10)60) xlab(0(60)600) ///
	legend(lab(1 "Random selection") lab(2 "Selection, {&beta}  0.1") cols(1) ring(0) pos(5)) ///
	ytitle("Age") xtit("Wait time (mins)") name(simselection5, replace) 
graph export "$results/FigE3d.pdf", as(pdf) replace
